This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
rm(list=ls())
data(uscomp)
?uscomp
## starting httpd help server ... done
head(uscomp)
## Assets Sales Market Value Profits Cash Flow
## Bell_Atlantic 19788 9084 10636 1092.9 2576.8
## Continental_Telecom 5074 2557 1892 239.9 578.3
## American_Electric_Power 13621 4848 4572 485.0 898.9
## Brooklyn_Union_Gas 1117 1038 478 59.7 91.7
## Central_Illinois_Publ._Serv. 1633 701 679 74.3 135.9
## Cleveland_Electric_Illum. 5651 1254 2002 310.7 407.9
## Employees Sector
## Bell_Atlantic 79.4 Communication
## Continental_Telecom 21.9 Communication
## American_Electric_Power 23.4 Energy
## Brooklyn_Union_Gas 3.8 Energy
## Central_Illinois_Publ._Serv. 2.8 Energy
## Cleveland_Electric_Illum. 6.2 Energy
uscomp$Sales = as.numeric(uscomp$Assets)
summary(uscomp)
## Assets Sales Market Value Profits
## Min. : 223 Min. : 223 Min. : 53.0 Min. :-771.5
## 1st Qu.: 1122 1st Qu.: 1122 1st Qu.: 512.5 1st Qu.: 39.0
## Median : 2788 Median : 2788 Median : 944.0 Median : 70.5
## Mean : 5941 Mean : 5941 Mean : 3269.1 Mean : 209.8
## 3rd Qu.: 5802 3rd Qu.: 5802 3rd Qu.: 1961.5 3rd Qu.: 188.1
## Max. :52634 Max. :52634 Max. :95697.0 Max. :6555.0
##
## Cash Flow Employees Sector
## Min. :-651.90 Min. : 0.60 Finance :17
## 1st Qu.: 75.15 1st Qu.: 3.95 Energy :15
## Median : 133.30 Median : 15.40 Manufacturing:10
## Mean : 400.93 Mean : 37.60 Retail :10
## 3rd Qu.: 328.85 3rd Qu.: 48.50 HiTech : 8
## Max. :9874.00 Max. :400.20 Other : 7
## (Other) :12
You can also embed plots, for example:
mat_num = uscomp[,1:6]
mat_num= as.matrix(mat_num)
corrplot(cor(mat_num),
method = "shade",
type = "upper",
bg = "blue",
title = "Correlation matrix between numerical variables",
is.corr = TRUE,
cl.cex = 0.8,
tl.cex = 0.9,
tl.col='black',
tl.srt = 15
)
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
all_companies = round(table(uscomp$Sector),0)
sectors = round(table(uscomp$Sector) * 2/5,0)
smaller_data = data.frame(matrix(data = NA, nrow = 0, ncol = dim(uscomp)[2] ))
for (i in 1:length(sectors)){
sector = subset(uscomp, uscomp$Sector == names(sectors[i]))[sample(1:all_companies[[i]], sectors[[i]]),]
smaller_data = rbind(smaller_data,sector)
}
mat = smaller_data[1:6]
heatmaply(mat,
dendrogram = "none",
xlab = "Numerical variables", ylab = "COmpanies",
main = "HeatMap",
scale = "column",
margins = c(60,100,40,20),
grid_color = "white",
grid_width = 0.00001,
titleX = TRUE,
hide_colorbar = TRUE,
branches_lwd = 0.1,
label_names = c("Company", "Feature:", "Value"),
fontsize_row = 5, fontsize_col = 5,
labCol = colnames(mat),
labRow = rownames(mat),
heatmap_layers = theme(axis.line=element_blank()),
)